//merge application data with assignments to ID which apps were assigned to firms

//format application data
use "${rawdata}2014/application_data.dta", clear
gen filing_year=year(filing_date)
drop if filing_date < td("01jan2001")
gen missing_filingdate= (filing_date==.)
drop if missing_filingdate==1
destring  examiner_id, replace force
drop if examiner_id==.
gen patent_issued = (disposal_type=="ISS")

//identify examiner gender
rename examiner_name_first  inventor_name_first
merge m:1 inventor_name_first using "${filedata}all_gender_namecaps.dta"
drop if _merge == 2
rename female examiner_female
rename inventor_name_first examiner_name_first 
drop missing_filingdate percent_female total_frequency _merge

//merge with assignment data
merge 1:m application_number using "${filedata}disambig_firms_app"
drop if _merge == 2
gen emp_assigned = (_merge==3)
drop _merge

//how many apps has a firm submitted
bysort bus_tag: gen firm_appct=_N
replace firm_appct=. if emp_assigned==0
//how many patents has a firm received
bysort bus_tag: egen firm_patct=sum(patent_issued)
replace firm_patct=. if emp_assigned==0

///now keep one per application--keep the one with biggest assignee
bysort application_number: egen max_firm_appct=max(firm_appct)
bysort application_number: egen max_firm_patct=max(firm_patct)
drop if firm_appct != max_firm_appct
bysort application_number: gen apptag=_n
drop if firm_patct != max_firm_patct & apptag != 1
drop if apptag != 1

replace firm_appct=max_firm_appct
replace firm_patct=max_firm_patct
drop max_firm_appct max_firm_patct
bysort bus_tag:gen firmtag=(_n==1)
replace firmtag=0 if emp_assigned==0

save "${filedata}2014application_emp.dta", replace

